import requests  # 导入requests包
import json
from bs4 import BeautifulSoup
import pandas as pd
# 电信 http://www.189.cn/dqmh/system.do?operate=getCookie&city=ah_hf
# 联通按省份  http://s.10010.com/zj/feeset/

# http://c.biancheng.net/view/2011.html
# beautifulsoup 官网
# https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/
# 获取省市县
class yidong(object):
    def loadAll(self):
        url = 'https://shop.10086.cn/list/140_100_100_0_0_0_0.html'
        cityUrl = 'https://shop.10086.cn/ajax/region/singleregion.json'
        strhtml = requests.get(url)
        soup = BeautifulSoup(strhtml.text, 'lxml')
        # data = soup.select('#header>div.search_mlogo.clearfix>div.logoWhere.clearfix>div.floatleft.clearfix>div.area.province>li')
        data = soup.select('#header>div.search_mlogo.clearfix>div.logoWhere.clearfix>div.floatleft.clearfix>div.area.province>ul>li')
        # print(data[0].select())

        headers={"Host": "shop.10086.cn",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0",
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
        "Accept-Encoding": "gzip, deflate, br",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "X-Requested-With": "XMLHttpRequest",
        "Content-Length": "15",
        "Connection": "keep-alive",
        "Referer": "https://shop.10086.cn/list/140_230_230_1_0_0_3_0.html"
        # ,"Cookie": "CmLocation=230|230; CmProvid=bj; WT_FPC=id=2c799e4204501b119e41569141144393:lv=1569147534120:ss=1569146022162; PHPSESSID=54bmmbnfag9svb0ft5om79m554; mobile=; cart_code_key=54bmmbnfag9svb0ft5om79m554"
                 }
        excelData = []
        for item in data:
            aAll = item.find_all('a')

            for a in aAll:
        #         todo 存储省份信息到数据库 如mongodb
        #         if(a.get_text() =="河南"):
                print(a.get_text(), ",", a.get("province_id"))
                # print(123)
                cityResp = requests.post(cityUrl, data={"province_id": a.get("province_id")}, headers=headers)
                # print(cityResp)
                cityData = json.loads(cityResp.text)
                print(cityData["data"])
                for ct in cityData["data"]:
                    print(ct['extend'], ct['city_name'], ct['city_id'])
                    # if(ct['extend']=='zhengzhou'):
                    cityTcUrl='https://shop.10086.cn/list/140_'+str(a.get("province_id"))+"_"+str(ct['city_id'])+"_1_0_0_0_0.html"
                    soup = BeautifulSoup(requests.get(cityTcUrl).text, 'lxml')
                    taocandata = soup.select('div.content.clearfix.position>div.maincolumn.floatright>div.packageList>ul.clearfix.withhover>li>dl.clearfix>dt.position>a')
                    for tc in taocandata:
                        print('套餐：', tc.get('title'), tc.get('href'))
                        # self.df.append(json.dumps({'省份': str(a.get_text()), '城市': str(ct['city_name']),
                        # self.df.append({'省份': str(a.get_text()), '城市': str(ct['city_name']),'套餐名称': str(tc.get('title')), '套餐链接': str(tc.get('href'))},ignore_index=True)
                        excelData.append({'省份': str(a.get_text()), '城市': str(ct['city_name']),'套餐名称': str(tc.get('title')), '套餐链接': str(tc.get('href'))})
                # self.df.to_excel('移动套餐.xlsx')
                self.df = pd.DataFrame(excelData)
                self.df.to_excel('移动套餐.xlsx')
                # else:
                #     print('')

                # cityResp = requests.post(cityUrl, data = {"province_id": a.get("province_id")}, headers= headers)
                # print(cityResp)
                # cityData = json.loads(cityResp.text)
                # print(cityData["data"]).
    def loadOne(self):
        soup = BeautifulSoup(requests.get("https://shop.10086.cn/list/140_200_200_0_0_0_0.html").text, 'lxml')
        taocandata = soup.select(
            'div.content.clearfix.position>div.maincolumn.floatright>div.packageList>ul.clearfix.withhover>li>dl.clearfix>dt.position>a')
        for tc in taocandata:
            print('套餐：', tc.get('title'), tc.get('href'))
    def __init__(self, master = None):
       #  ,dtype={'省份':str}
       # self.df = pd.DataFrame(json.dumps({'省份': '', '城市': '','套餐名称': '', '套餐链接': ''}))
       # self.df = pd.DataFrame({'省份': '3', '城市': '4','套餐名称': '5', '套餐链接': '6'},index=[0])
       # self.df.append({'省份': '1', '城市': '2','套餐名称': '3', '套餐链接': '4'},ignore_index=True)
       # self.df.to_excel('移动套餐.xlsx')
       print("initMethod start")
app = yidong()
app.loadAll()
